iT邦幫忙

0

python 分析line通話記錄

  • 分享至 

  • xImage
  •  

colab 要上傳檔案

# 上傳檔案
uploaded_files = files.upload()

# 從字典中取得檔案名稱
uploaded_file_name = list(uploaded_files.keys())[0]

# 讀取檔案內容
content = uploaded_files[uploaded_file_name]

# 將字串轉換為UTF-8格式(這是可選的,具體取決於您的文件)
content = content.decode('utf-8')

# 將內容保存到line.txt檔案中
with open('line.txt', 'w', encoding='utf-8') as file:
    file.write(content)

# 顯示檔案名稱和內容(供測試用)
print(f"檔案名稱:{uploaded_file_name}")

=====

#設定
YOUR_NAME="XXX"
HER_NAME="SSSS"

安裝需要的套件

!pip install jieba
!pip install cutecharts

import re

匯入套件

import jieba
from datetime import datetime
from cutecharts.charts import Bar, Pie
from cutecharts.components import Page

讀取Colab上傳的文檔

content = open('line.txt', 'r', encoding='utf-8').read()

使用jieba進行分詞

words = jieba.lcut(content)
counts = {}

進行統計

for word in words:
if len(word) <= 1 or word.isdigit():
continue
else:
counts[word] = counts.get(word, 0) + 1

刪除不重要的詞語

text = ' '.join(words)
excludes = {'\r\n', '下午', '上午', '...'}
for exword in excludes:
try:
del(counts[exword])
except:
continue

排序

items = list(counts.items())
items.sort(key=lambda x: x[1], reverse=True)

繪製關鍵字圖表

top_words = []
top_counts = []
i = -1
while len(top_words) <= 10:
i += 1
word, count = items[i]
if word == "通話" or word == "照片" or word == "影片" or word == "貼圖" or word == YOUR_NAME or word == HER_NAME:
continue
top_words.append(word)
top_counts.append(count)

chart = Bar("關鍵字圖表")
chart.set_options(labels=top_words, x_label="單詞", y_label="出現次數")
chart.add_series("次數", top_counts)

繪製通話/影片/照片數統計圖表

chart2 = Pie("通話/影片/照片數統計")
chart2.set_options(labels=['照片', '影片', '通話'])
chart2.add_series([counts.get("照片", 0), counts.get("影片", 0), counts.get("通話", 0)])

繪製傳送訊息量圖表

chart3 = Pie("傳送訊息量")
chart3.set_options(labels=[YOUR_NAME, HER_NAME], inner_radius=0)
chart3.add_series([counts.get(YOUR_NAME, 0), counts.get(HER_NAME, 0)])

定義正規式

pattern = r"(?m)^.{10}((\w+))(?=\n)"

建立字典來儲存日期次數

weekdays_counts = {}

讀取檔案內容

with open("line.txt", "r") as f:
content = f.read()

搜尋所有符合正規式的日期

matches = re.finditer(pattern, content)

統計日期次數

for match in matches:
weekday = match.group(1)
# 將星期名稱轉換為中文表示
if weekday in ("Monday", "二"):
weekday = "二"
elif weekday in ("Wednesday", "三"):
weekday = "三"
elif weekday in ("Thursday", "四"):
weekday = "四"
elif weekday in ("Friday", "五"):
weekday = "五"
elif weekday in ("Saturday", "六"):
weekday = "六"
elif weekday in ("Sunday", "日"):
weekday = "日"
weekdays_counts[weekday] = weekdays_counts.get(weekday, 0) + 1

輸出日期次數

for weekday, count in weekdays_counts.items():
print(f"{weekday}: {count}")

繪製星期統計圖表

chart4 = Bar("星期資料統計")
chart4.set_options(labels=list(weekdays_counts.keys()), x_label="星期", y_label="次數")
chart4.add_series("次數", list(weekdays_counts.values()))

將圖表整合成一個頁面

page = Page()
page.add(chart)
page.add(chart2)
page.add(chart3)
page.add(chart4)

將圖表保存為HTML文件

html_file_path = "charts.html"
page.render(html_file_path)

在Colab中下載HTML文件

from google.colab import files
files.download("charts.html")


圖片
  直播研討會
圖片
{{ item.channelVendor }} {{ item.webinarstarted }} |
{{ formatDate(item.duration) }}
直播中

尚未有邦友留言

立即登入留言